import os
import sys
NOVA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
NOVA_DATA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
os.environ['NOVA_HOME'] = NOVA_HOME
LOGS_PATH = os.path.join(NOVA_HOME, "outputs", "preprocessing", "ManuscriptFinalData_80pct", "neuronsDay8_new", "logs")
PLOT_PATH = os.path.join(NOVA_HOME, 'outputs', 'preprocessing', 'ManuscriptFinalData_80pct', "neuronsDay8_new", 'QC_figures')
sys.path.insert(1, os.getenv("NOVA_HOME"))
print(f"NOVA_HOME: {os.getenv('NOVA_HOME')}")
print(os.environ['NOVA_HOME'])
import pandas as pd
import contextlib
import io
from IPython.display import display, Javascript
from tools.preprocessing_tools.qc_reports.qc_utils import log_files_qc, run_validate_folder_structure, display_diff, sample_and_calc_variance, \
show_site_survival_dapi_brenner, show_site_survival_dapi_cellpose, \
show_site_survival_dapi_tiling, show_site_survival_target_brenner, \
calc_total_sums, plot_filtering_heatmap, show_total_sum_tables, \
plot_cell_count, plot_catplot, plot_hm_of_mean_cell_count_per_tile, \
run_calc_hist_new
from tools.preprocessing_tools.qc_reports.qc_config import new_d8_panels, new_d8_markers, new_d8_marker_info, new_d8_cell_lines, new_d8_cell_lines_to_cond,\
new_d8_cell_lines_for_disp, new_d8_reps, new_d8_line_colors, new_d8_lines_order, new_d8_custom_palette,\
new_d8_expected_dapi_raw
%load_ext autoreload
%autoreload 2
NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA /home/projects/hornsteinlab/Collaboration/NOVA
# choose batches
batches = ['batch1', 'batch2', 'batch3', 'batch7', 'batch8', 'batch9', 'batch10']#['batch1', 'batch2', 'batch3', 'batch7', 'batch8', 'batch9', 'batch10']
batches_clean = [b+'CLEAN' for b in batches]
batches
['batch1', 'batch2', 'batch3', 'batch7', 'batch8', 'batch9', 'batch10']
df = log_files_qc(LOGS_PATH, batches, filename_split='-',site_location=0)
df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
reading logs of batch8 reading logs of batch3 reading logs of batch9 reading logs of batch10 reading logs of batch2 reading logs of batch1 reading logs of batch7 Total of 15 files were read. Before dup handeling (1147717, 21) After duplication removal #1: (1071227, 22) After duplication removal #2: (1071227, 22) PAY ATTENTION!!!! df.site_num: r06c02f14, can be defined using filename_split & site_location
root_directory_raw = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'raw', 'OPERA_indi_sorted')
raws = run_validate_folder_structure(root_directory_raw, False, new_d8_panels, new_d8_markers,PLOT_PATH,new_d8_marker_info,
new_d8_cell_lines_to_cond, new_d8_reps, new_d8_cell_lines_for_disp, new_d8_expected_dapi_raw,
batches=batches,expected_count=250,check_antibody=False)
batch1 Folder structure is invalid. Missing 11 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelA /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelB /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelC /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelD /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelE /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelF /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelG /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelH /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelI /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelJ /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch1/SNCA/panelL No bad files are found. Total Sites: 163997
======== batch2 Folder structure is invalid. Missing 1 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch2/SNCA 1 files are bad: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch2/WT/panelA/Untreated/rep2/FMRP, Thumbs.db, ext is .db Total Sites: 164001
======== batch3 Folder structure is invalid. Missing 1 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch3/SNCA No bad files are found. Total Sites: 163996
======== batch7 Folder structure is valid. No bad files are found. Total Sites: 184494
======== batch8 Folder structure is valid. No bad files are found. Total Sites: 184500
======== batch9 Folder structure is valid. 1 files are bad: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/raw/OPERA_indi_sorted/batch9/WT/panelB/Untreated/rep2/CD41, Thumbs.db, ext is .db Total Sites: 184493
======== batch10 Folder structure is valid. No bad files are found. Total Sites: 184500
======== ====================
root_directory_proc = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'processed', 'ManuscriptFinalData_80pct',
'neuronsDay8_new')
procs = run_validate_folder_structure(root_directory_proc, True, new_d8_panels, new_d8_markers,PLOT_PATH,new_d8_marker_info,
new_d8_cell_lines_to_cond, new_d8_reps, new_d8_cell_lines_for_disp, new_d8_expected_dapi_raw,
batches=batches_clean,expected_count=250, check_antibody=False)
batch1CLEAN Folder structure is invalid. Missing 1 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch1CLEAN/SNCA No bad files are found. Total Sites: 145369
======== batch2CLEAN Folder structure is invalid. Missing 1 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch2CLEAN/SNCA No bad files are found. Total Sites: 144929
======== batch3CLEAN Folder structure is invalid. Missing 1 paths: /home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8_new/batch3CLEAN/SNCA No bad files are found. Total Sites: 145953
======== batch7CLEAN Folder structure is valid. No bad files are found. Total Sites: 154031
======== batch8CLEAN Folder structure is valid. No bad files are found. Total Sites: 168274
======== batch9CLEAN Folder structure is valid. No bad files are found. Total Sites: 153650
======== batch10CLEAN Folder structure is valid. No bad files are found. Total Sites: 131289
======== ====================
display_diff(batches, raws, procs, PLOT_PATH)
batch1
======== batch2
======== batch3
======== batch7
======== batch8
======== batch9
======== batch10
========
root_directory_proc = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'processed', 'ManuscriptFinalData_80pct',
'neuronsDay8_new')
for batch in batches_clean:
with contextlib.redirect_stdout(io.StringIO()):
var = sample_and_calc_variance(root_directory_proc, batch,
sample_size_per_markers=200, num_markers=30)
print(f'{batch} var: ',var)
batch1CLEAN var: 0.05012238092547438 batch2CLEAN var: 0.052469953520045735 batch3CLEAN var: 0.05186614281187016 batch7CLEAN var: 0.04970548078871655 batch8CLEAN var: 0.05053652939497635 batch9CLEAN var: 0.05068049787840124 batch10CLEAN var: 0.05114243272228387
By order of filtering
Percentage out of the total sites
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi,batches, new_d8_line_colors, new_d8_panels, new_d8_reps,
vmax=250,
to_ignore={'cell_line_cond':'SNCA','batch':['batch1','batch2','batch3']})
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if Cellpose found 0 cells in it.
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi, batches, dapi_filter_by_brenner, new_d8_line_colors, new_d8_panels, new_d8_reps,
figsize=(7,5), to_ignore={'cell_line_cond':['SNCA'],'batch':['batch1','batch2','batch3']})
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if after tiling, no tile is containing at least one whole cell that Cellpose detected.
dapi_filter_by_tiling=show_site_survival_dapi_tiling(df_dapi, batches, dapi_filter_by_cellpose, new_d8_line_colors, new_d8_panels, new_d8_reps,
figsize=(7,5),to_ignore={'cell_line_cond':['SNCA'],'batch':['batch1','batch2','batch3']})
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).
show_site_survival_target_brenner(df_dapi, df_target, dapi_filter_by_tiling, new_d8_markers, figsize=(7,8))
stats = ['n_valid_tiles','site_whole_cells_counts_sum','site_cell_count','site_cell_count_sum']
total_sum = calc_total_sums(df_target, df_dapi, stats, new_d8_markers)
total_sum[~total_sum.marker.str.contains('TIA', regex=True)].n_valid_tiles.sum()
9196363
## Total tiles in wt lines
total_sum[(~total_sum.marker.str.contains('TIA', regex=True))&
(total_sum.cell_line_cond.isin(['WT stress', 'WT Untreated']))].n_valid_tiles.sum()
2535188
## Total tiles in untreated lines
total_sum[(~total_sum.marker.str.contains('TIA', regex=True))&
((~total_sum.cell_line_cond.str.contains('WT')) | (total_sum.cell_line_cond=='WT Untreated'))].n_valid_tiles.sum()
7923051
total_sum[total_sum.marker =='DAPI'].site_whole_cells_counts_sum.sum()
1849083.0
total_sum[total_sum.marker =='DAPI'].site_cell_count.sum()
8102443.0
show_total_sum_tables(total_sum)
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch1 | ||||
| count | 6.560000e+02 | 656.000000 | 656.000000 | 6.560000e+02 |
| mean | 2.260840e+03 | 22.608399 | 1419.608232 | 5.842776e+03 |
| std | 7.071879e+02 | 7.071879 | 523.800726 | 1.797175e+03 |
| min | 4.700000e+02 | 4.700000 | 263.000000 | 1.131000e+03 |
| 25% | 1.691500e+03 | 16.915000 | 1077.500000 | 4.754500e+03 |
| 50% | 2.300000e+03 | 23.000000 | 1384.000000 | 5.839000e+03 |
| 75% | 2.791000e+03 | 27.910000 | 1849.750000 | 7.186250e+03 |
| max | 3.763000e+03 | 37.630000 | 2646.000000 | 1.027900e+04 |
| sum | 1.483111e+06 | NaN | 931263.000000 | 3.832861e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch10 | ||||
| count | 7.380000e+02 | 738.000000 | 738.000000 | 7.380000e+02 |
| mean | 1.631982e+03 | 16.319824 | 1137.668022 | 4.661591e+03 |
| std | 1.060984e+03 | 10.609838 | 822.340207 | 2.972323e+03 |
| min | 6.000000e+00 | 0.060000 | 0.000000 | 1.800000e+01 |
| 25% | 7.217500e+02 | 7.217500 | 435.250000 | 2.019500e+03 |
| 50% | 1.567000e+03 | 15.670000 | 1064.500000 | 4.841500e+03 |
| 75% | 2.535750e+03 | 25.357500 | 1668.000000 | 7.075500e+03 |
| max | 3.646000e+03 | 36.460000 | 3335.000000 | 1.107800e+04 |
| sum | 1.204403e+06 | NaN | 839599.000000 | 3.440254e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch2 | ||||
| count | 6.530000e+02 | 653.000000 | 653.000000 | 6.530000e+02 |
| mean | 1.597243e+03 | 15.972435 | 879.632466 | 3.976789e+03 |
| std | 4.735530e+02 | 4.735530 | 268.013557 | 1.037326e+03 |
| min | 1.830000e+02 | 1.830000 | 67.000000 | 4.770000e+02 |
| 25% | 1.285000e+03 | 12.850000 | 689.000000 | 3.362000e+03 |
| 50% | 1.700000e+03 | 17.000000 | 903.000000 | 4.106000e+03 |
| 75% | 1.937000e+03 | 19.370000 | 1069.000000 | 4.688000e+03 |
| max | 2.511000e+03 | 25.110000 | 1670.000000 | 6.325000e+03 |
| sum | 1.043000e+06 | NaN | 574400.000000 | 2.596843e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch3 | ||||
| count | 6.560000e+02 | 656.000000 | 656.000000 | 6.560000e+02 |
| mean | 1.672093e+03 | 16.720930 | 890.144817 | 4.118247e+03 |
| std | 5.010009e+02 | 5.010009 | 272.753277 | 1.072282e+03 |
| min | 3.370000e+02 | 3.370000 | 163.000000 | 7.530000e+02 |
| 25% | 1.358250e+03 | 13.582500 | 715.750000 | 3.512750e+03 |
| 50% | 1.751000e+03 | 17.510000 | 892.000000 | 4.231500e+03 |
| 75% | 2.021250e+03 | 20.212500 | 1077.250000 | 4.890500e+03 |
| max | 2.802000e+03 | 28.020000 | 1696.000000 | 6.807000e+03 |
| sum | 1.096893e+06 | NaN | 583935.000000 | 2.701570e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch7 | ||||
| count | 7.330000e+02 | 733.000000 | 733.000000 | 7.330000e+02 |
| mean | 1.954674e+03 | 19.546739 | 1277.364256 | 5.774357e+03 |
| std | 7.899062e+02 | 7.899062 | 570.143431 | 2.020152e+03 |
| min | 5.000000e+00 | 0.050000 | 2.000000 | 1.500000e+01 |
| 25% | 1.464000e+03 | 14.640000 | 911.000000 | 4.668000e+03 |
| 50% | 2.089000e+03 | 20.890000 | 1227.000000 | 6.048000e+03 |
| 75% | 2.501000e+03 | 25.010000 | 1590.000000 | 7.023000e+03 |
| max | 3.571000e+03 | 35.710000 | 3200.000000 | 9.793000e+03 |
| sum | 1.432776e+06 | NaN | 936308.000000 | 4.232604e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch8 | ||||
| count | 7.380000e+02 | 738.000000 | 738.000000 | 7.380000e+02 |
| mean | 2.165144e+03 | 21.651436 | 1331.829268 | 5.985867e+03 |
| std | 6.710593e+02 | 6.710593 | 378.081507 | 1.386811e+03 |
| min | 1.900000e+02 | 1.900000 | 130.000000 | 4.830000e+02 |
| 25% | 1.826000e+03 | 18.260000 | 1078.250000 | 5.037500e+03 |
| 50% | 2.281500e+03 | 22.815000 | 1316.500000 | 6.050000e+03 |
| 75% | 2.656000e+03 | 26.560000 | 1607.000000 | 7.153000e+03 |
| max | 3.458000e+03 | 34.580000 | 2327.000000 | 9.124000e+03 |
| sum | 1.597876e+06 | NaN | 982890.000000 | 4.417570e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch9 | ||||
| count | 7.350000e+02 | 735.000000 | 735.000000 | 7.350000e+02 |
| mean | 2.093129e+03 | 20.931293 | 1314.273469 | 5.794131e+03 |
| std | 8.432317e+02 | 8.432317 | 523.021712 | 1.994131e+03 |
| min | 0.000000e+00 | 0.000000 | 8.000000 | 1.100000e+01 |
| 25% | 1.413000e+03 | 14.130000 | 999.500000 | 4.595500e+03 |
| 50% | 2.302000e+03 | 23.020000 | 1322.000000 | 6.128000e+03 |
| 75% | 2.758000e+03 | 27.580000 | 1656.500000 | 7.215000e+03 |
| max | 3.634000e+03 | 36.340000 | 2845.000000 | 9.762000e+03 |
| sum | 1.538450e+06 | NaN | 965991.000000 | 4.258686e+06 |
| expected_count | 4.500000e+02 | 450.000000 | 450.000000 | 4.500000e+02 |
| n valid tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| All batches | ||||
| count | 4.909000e+03 | 4909.000000 | 4.909000e+03 | 4.909000e+03 |
| mean | 1.914139e+03 | 19.141391 | 1.184434e+03 | 5.190546e+03 |
| std | 7.936189e+02 | 7.936189 | 5.542821e+02 | 2.048276e+03 |
| min | 0.000000e+00 | 0.000000 | 0.000000e+00 | 1.100000e+01 |
| 25% | 1.390000e+03 | 13.900000 | 8.030000e+02 | 3.881000e+03 |
| 50% | 1.976000e+03 | 19.760000 | 1.133000e+03 | 5.174000e+03 |
| 75% | 2.484000e+03 | 24.840000 | 1.521000e+03 | 6.658000e+03 |
| max | 3.763000e+03 | 37.630000 | 3.335000e+03 | 1.107800e+04 |
| sum | 9.396509e+06 | NaN | 5.814386e+06 | 2.548039e+07 |
| expected_count | 4.500000e+02 | 450.000000 | 4.500000e+02 | 4.500000e+02 |
For each batch, cell line, replicate and markerTotal number of tiles
to_heatmap = total_sum.rename(columns={'n_valid_tiles':'index'})
plot_filtering_heatmap(to_heatmap, extra_index='marker', vmin=None, vmax=None,
xlabel = 'Total number of tiles', show_sum=True, figsize=(6,8))
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
For each batch, cell line, replicate and markerTotal number of tiles
to_heatmap = total_sum.rename(columns={'site_whole_cells_counts_sum':'index'})
plot_filtering_heatmap(to_heatmap[to_heatmap.batch=='batch7'], extra_index='marker', vmin=None, vmax=None,
xlabel = 'Total number of whole cells', show_sum=True, figsize=(6,8))
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites, new_d8_lines_order, new_d8_custom_palette, y='site_cell_count_sum',
title='Cell Count Average per Site (from tiles)')
plot_cell_count(df_no_empty_sites, new_d8_lines_order, new_d8_custom_palette, y='site_whole_cells_counts_sum',
title='Whole Cell Count Average per Site')
plot_cell_count(df_no_empty_sites, new_d8_lines_order, new_d8_custom_palette, y='site_cell_count',
title='Cellpose Cell Count Average per Site')
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order, /home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:618: UserWarning: The palette list has more values (10) than needed (9), which may not be intended. c = sns.barplot(data=batch, x='rep', hue='cell_line_cond', y=y, hue_order = order,
df_dapi.groupby(['cell_line_cond']).n_valid_tiles.mean()
cell_line_cond FUSHeterozygous 7.729141 FUSHomozygous 8.147912 FUSRevertant 7.297871 OPTN 8.778378 SNCA 6.696576 TBK1 7.475947 TDP43 9.103717 WT Untreated 9.866203 WT stress 9.851510 Name: n_valid_tiles, dtype: float64
plot_catplot(df_dapi, new_d8_custom_palette,new_d8_reps, x='n_valid_tiles', x_title='valid tiles count', batches=batches)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1058: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df.loc[:, 'batch_rep'] = df['batch'] + " " + df['rep']
plot_hm_of_mean_cell_count_per_tile(df_dapi, split_by='rep', rows='cell_line', columns='panel', vmax=3)
df_dapi[['cells_count_in_valid_tiles_mean']].mean()
cells_count_in_valid_tiles_mean 1.744963 dtype: float64
df_dapi[['site_cell_count']].mean()
site_cell_count 22.841799 dtype: float64
# for batch in batches:
# print(batch)
# #batch_num = batch.replace('batch',"")
# run_calc_hist_new(batch,new_d8_cell_lines_for_disp, new_d8_markers, root_directory_raw, root_directory_proc,
# hist_sample=10,sample_size_per_markers=200, ncols=7, nrows=4)
# print("="*30)
# # save notebook as HTML ( the HTML will be saved in the same folder the original script is)
# display(Javascript('IPython.notebook.save_checkpoint();'))
# os.system(f'jupyter nbconvert --to html {NOVA_HOME}/tools/preprocessing_tools/qc_reports/qc_report_new_d8_80pct.ipynb --output {NOVA_HOME}/manuscript/preprocessing_qc_reports/qc_report_new_d8_80pct.html')